The final report summarizing all findings and analyses is available
as TimeSeries_FinalReport.pdf.
Number of cars made in Spain. Monthly Data
serie=window(ts(read.table("Turismos.dat")/1000,start=1994,freq=12))
serie
## Jan Feb Mar Apr May Jun Jul Aug Sep
## 1994 138.425 166.842 179.400 223.446 187.183 183.394 168.756 25.152 184.762
## 1995 188.898 187.161 216.639 164.914 218.173 212.086 190.483 31.298 197.755
## 1996 181.594 179.934 190.851 183.442 208.492 194.059 196.897 41.464 173.298
## 1997 188.668 162.265 182.918 204.807 204.510 200.507 189.052 36.428 207.655
## 1998 189.017 197.430 220.661 203.018 213.316 228.743 208.647 60.188 213.379
## 1999 196.592 219.682 237.449 184.858 228.762 228.788 220.227 40.069 219.635
## 2000 192.199 232.617 255.023 200.635 260.528 252.363 224.501 61.962 210.429
## 2001 234.075 223.152 238.185 185.781 238.777 230.345 209.111 56.720 177.481
## 2002 201.087 208.120 188.543 217.216 232.923 219.179 212.444 82.742 215.172
## 2003 207.006 212.705 226.428 220.719 233.278 236.176 231.517 47.827 214.580
## 2004 194.092 216.004 243.359 208.072 237.175 257.235 217.272 68.403 244.797
## 2005 167.391 193.393 191.860 211.320 217.392 224.185 178.916 62.231 217.278
## 2006 180.421 188.678 210.103 153.392 226.924 216.732 158.268 70.959 206.780
## 2007 201.613 210.081 218.483 181.890 245.369 227.389 194.897 90.993 221.064
## 2008 212.170 222.210 182.465 228.808 215.425 190.450 201.466 48.071 194.813
## 2009 98.923 118.040 165.315 154.059 158.651 177.560 175.170 70.643 197.654
## 2010 165.867 185.721 201.279 158.219 186.972 195.533 175.408 46.060 172.672
## 2011 156.137 178.803 196.789 152.708 187.311 194.090 162.124 48.990 193.592
## 2012 130.764 163.556 157.821 120.766 161.929 139.823 135.140 42.400 113.375
## 2013 141.505 170.571 144.180 154.760 175.503 163.706 168.988 55.773 162.059
## 2014 133.361 171.602 173.105 161.608 183.129 177.938 185.277 38.383 180.748
## 2015 175.463 201.128 214.138 177.533 204.959 215.479 214.675 46.727 208.080
## 2016 176.924 232.724 217.183 232.322 229.409 228.938 193.223 78.514 214.231
## 2017 185.033 226.722 246.506 161.142 231.152 209.286 183.542 56.640 208.777
## 2018 185.429 211.958 220.623 222.549 246.673 237.661 189.627 76.969 160.246
## 2019 184.174 200.660 210.198 188.326 229.168 209.779 186.358 97.569 184.371
## Oct Nov Dec
## 1994 192.724 203.301 154.518
## 1995 178.876 174.189 121.486
## 1996 201.346 188.741 123.503
## 1997 224.210 200.754 153.423
## 1998 224.265 216.919 168.454
## 1999 205.875 219.729 165.995
## 2000 221.817 246.391 161.519
## 2001 188.981 190.019 118.284
## 2002 219.626 221.530 147.884
## 2003 250.982 224.027 161.019
## 2004 217.813 226.977 125.871
## 2005 188.788 199.915 124.800
## 2006 207.433 228.585 140.751
## 2007 229.800 218.435 121.190
## 2008 156.407 136.327 69.464
## 2009 191.606 188.592 136.989
## 2010 167.846 182.858 114.095
## 2011 157.900 166.372 89.608
## 2012 132.607 128.997 88.635
## 2013 166.270 152.295 99.769
## 2014 193.392 173.023 125.211
## 2015 201.475 213.905 144.129
## 2016 206.200 225.181 121.751
## 2017 202.825 227.069 142.182
## 2018 192.225 209.633 115.870
## 2019 207.221 198.337 141.364
plot(serie, main="Cars made in Spain",ylab="Thousands of Units")
abline(v=1994:2020,lty=3,col=4)
boxplot(serie~floor(time(serie)))
# matrix(serie, nr = 12)
# length(serie)/12 # 26
m<-apply(matrix(serie,nrow=12),2,mean)
v<-apply(matrix(serie,nrow=12),2,var)
plot(v~m) # there is an increase. so we have to take logrithm
abline(lm(v~m),col=2,lty=3)
It is not constant. There is an increase. so we have to take logrithm.
lnserie <- log(serie)
plot(lnserie, main="ln Cars made in Spain",ylab="Thousands of Units")
abline(v=1994:2020,lty=3,col=4)
Let’s check the same plots for the transformed series.
matrixlnserie <- matrix(lnserie,nrow=12)
boxplot(matrixlnserie)
lnm<-apply(matrix(matrixlnserie,nrow=12),2,mean) # ln m
lnv<-apply(matrix(matrixlnserie,nrow=12),2,var) # ln v
plot(lnv~lnm) # there is an increase. so we have to take logrithm
abline(lm(lnv~lnm),col=2,lty=3)
The first transformation is done.
Yes seasonality is present, and has a downfall every august.
monthplot(lnserie) # August!!
ts.plot(matrix(lnserie,nrow=12),col=1:8)
d12lnserie<-diff(lnserie,lag=12)
plot(d12lnserie)
abline(h=0)
abline(h=mean(d12lnserie),col=2)
monthplot(d12lnserie)
ts.plot(matrix(d12lnserie,nrow=12),col=1:8)
# Mean seemingly constant equal to zero!!!
# code, the same as before
plot(d12lnserie)
abline(h=0)
abline(h=mean(d12lnserie),col=2)
mean(d12lnserie)
## [1] 0.00735298
var(lnserie) # 0.164901
## V1
## V1 0.164901
var(d12lnserie) # 0.03583941
## V1
## V1 0.03583941
Check for over-differentiation: Take an extra differentiation and compare the variances.
d1d12lnserie=diff(d12lnserie)
var(d1d12lnserie) # 0.04014573
## V1
## V1 0.04014573
Is the current series already stationary? Yes.
par(mar = c(5, 4, 4, 2))
acf(d12lnserie,ylim=c(-1,1),lag.max=60,col=c(2,rep(1,3)), main ="ACF(serie)")
pacf(d12lnserie,ylim=c(-1,1),lag.max=60,col=c(2,rep(1,3)), main ="PACF(serie)")
# the ACF plot is also useful for identifying non-stationary time series. For a stationary time series, the ACF will drop to zero relatively quickly, while the ACF of non-stationary data decreases slowly.
par(mfrow=c(1,2))
acf(d12lnserie, ylim=c(-1,1), lag.max = 72,col=c(2,rep(1,11)),lwd=2)
pacf(d12lnserie, ylim=c(-1,1), lag.max = 72,col=c(rep(1,11),2),lwd=2)
(mod1=arima(lnserie,order=c(3,0,0),seasonal=list(order=c(0,1,1),period=12))) #ar(3) for regular part
##
## Call:
## arima(x = lnserie, order = c(3, 0, 0), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 sma1
## 0.2302 0.2949 0.2604 -0.6868
## s.e. 0.0560 0.0551 0.0562 0.0487
##
## sigma^2 estimated as 0.01689: log likelihood = 182.45, aic = -354.89
(mod2=arima(lnserie,order=c(0,0,6),seasonal=list(order=c(0,1,1),period=12))) #ma(6) for refular part
##
## Call:
## arima(x = lnserie, order = c(0, 0, 6), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 sma1
## 0.2306 0.3329 0.3308 0.1566 0.2014 0.2190 -0.6372
## s.e. 0.0625 0.0638 0.0648 0.0597 0.0631 0.0678 0.0548
##
## sigma^2 estimated as 0.01756: log likelihood = 177.2, aic = -338.41
(mod3=arima(lnserie,order=c(1,0,1),seasonal=list(order=c(0,1,1),period=12))) #simplest model arma(1,1) and seasonal part is the same
##
## Call:
## arima(x = lnserie, order = c(1, 0, 1), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ar1 ma1 sma1
## 0.9286 -0.6160 -0.7012
## s.e. 0.0289 0.0542 0.0491
##
## sigma^2 estimated as 0.01747: log likelihood = 177.31, aic = -346.63
#################Validation#################################
validation=function(model){
s=frequency(get(model$series))
resi=model$residuals
par(mfrow=c(2,2),mar=c(3,3,3,3))
#Residuals plot
plot(resi,main="Residuals")
abline(h=0)
abline(h=c(-3*sd(resi),3*sd(resi)),lty=3,col=4)
#Square Root of absolute values of residuals (Homocedasticity)
scatter.smooth(sqrt(abs(resi)),main="Square Root of Absolute residuals",
lpars=list(col=2))
#Normal plot of residuals
qqnorm(resi)
qqline(resi,col=2,lwd=2)
##Histogram of residuals with normal curve
hist(resi,breaks=20,freq=FALSE)
curve(dnorm(x,mean=mean(resi),sd=sd(resi)),col=2,add=T)
#ACF & PACF of residuals
par(mfrow=c(1,2))
acf(resi,ylim=c(-1,1),lag.max=60,col=c(2,rep(1,s-1)),lwd=2)
pacf(resi,ylim=c(-1,1),lag.max=60,col=c(rep(1,s-1),2),lwd=2)
par(mfrow=c(1,1))
#Ljung-Box p-values
par(mar=c(2,2,1,1))
tsdiag(model,gof.lag=7*s)
cat("\n--------------------------------------------------------------------\n")
print(model)
#Stationary and Invertible
cat("\nModul of AR Characteristic polynomial Roots: ",
Mod(polyroot(c(1,-model$model$phi))),"\n")
cat("\nModul of MA Characteristic polynomial Roots: ",
Mod(polyroot(c(1,model$model$theta))),"\n")
suppressMessages(require(forecast,quietly=TRUE,warn.conflicts=FALSE))
plot(model)
#Model expressed as an MA infinity (psi-weights)
psis=ARMAtoMA(ar=model$model$phi,ma=model$model$theta,lag.max=36)
names(psis)=paste("psi",1:36)
cat("\nPsi-weights (MA(inf))\n")
cat("\n--------------------\n")
print(psis[1:24])
#Model expressed as an AR infinity (pi-weights)
pis=-ARMAtoMA(ar=-model$model$theta,ma=-model$model$phi,lag.max=36)
names(pis)=paste("pi",1:36)
cat("\nPi-weights (AR(inf))\n")
cat("\n--------------------\n")
print(pis[1:24])
cat("\nDescriptive Statistics for the Residuals\n")
cat("\n----------------------------------------\n")
suppressMessages(require(fBasics,quietly=TRUE,warn.conflicts=FALSE))
##Anderson-Darling test
#print(basicStats(resi))
## Add here complementary tests (use with caution!)
##---------------------------------------------------------
cat("\nNormality Tests\n")
cat("\n--------------------\n")
##Shapiro-Wilks Normality test
print(shapiro.test(resi))
suppressMessages(require(nortest,quietly=TRUE,warn.conflicts=FALSE))
##Anderson-Darling test
print(ad.test(resi))
suppressMessages(require(tseries,quietly=TRUE,warn.conflicts=FALSE))
##Jarque-Bera test
print(jarque.bera.test(resi))
cat("\nHomoscedasticity Test\n")
cat("\n--------------------\n")
suppressMessages(require(lmtest,quietly=TRUE,warn.conflicts=FALSE))
##Breusch-Pagan test
obs=get(model$series)
print(bptest(resi~I(obs-resi)))
cat("\nIndependence Tests\n")
cat("\n--------------------\n")
##Durbin-Watson test
print(dwtest(resi~I(1:length(resi))))
##Ljung-Box test
cat("\nLjung-Box test\n")
print(t(apply(matrix(c(1:4,(1:4)*s)),1,function(el) {
te=Box.test(resi,type="Ljung-Box",lag=el)
c(lag=(te$parameter),statistic=te$statistic[[1]],p.value=te$p.value)})))
}
################# Fi Validation #################################
Perform the complete analysis of residuals, justifying all assumptions made. Use the corresponding tests and graphical results.
Include analysis of the expressions of the AR and MA infinite models, discuss if they are causal and/or invertible and report some adequacy measures.
#Model1
validation(mod1)
##
## --------------------------------------------------------------------
##
## Call:
## arima(x = lnserie, order = c(3, 0, 0), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 sma1
## 0.2302 0.2949 0.2604 -0.6868
## s.e. 0.0560 0.0551 0.0562 0.0487
##
## sigma^2 estimated as 0.01689: log likelihood = 182.45, aic = -354.89
##
## Modul of AR Characteristic polynomial Roots: 1.1234 1.848768 1.848768
##
## Modul of MA Characteristic polynomial Roots: 1.0318 1.0318 1.0318 1.0318 1.0318 1.0318 1.0318 1.0318 1.0318 1.0318 1.0318 1.0318
## Warning: package 'forecast' was built under R version 4.3.3
##
## Psi-weights (MA(inf))
##
## --------------------
## psi 1 psi 2 psi 3 psi 4 psi 5 psi 6
## 0.23022066 0.34787210 0.40840855 0.25655905 0.27009123 0.24419654
## psi 7 psi 8 psi 9 psi 10 psi 11 psi 12
## 0.20267823 0.18900854 0.16687506 0.14693583 0.13225882 -0.56959985
## psi 13 psi 14 psi 15 psi 16 psi 17 psi 18
## -0.05386706 -0.14591458 -0.19782062 -0.10259722 -0.11995287 -0.10938812
## psi 19 psi 20 psi 21 psi 22 psi 23 psi 24
## -0.08727398 -0.08358764 -0.07346672 -0.06429037 -0.05823337 -0.05149723
##
## Pi-weights (AR(inf))
##
## --------------------
## pi 1 pi 2 pi 3 pi 4 pi 5 pi 6 pi 7
## 0.2302207 0.2948705 0.2604359 0.0000000 0.0000000 0.0000000 0.0000000
## pi 8 pi 9 pi 10 pi 11 pi 12 pi 13 pi 14
## 0.0000000 0.0000000 0.0000000 0.0000000 -0.6868359 0.1581238 0.2025277
## pi 15 pi 16 pi 17 pi 18 pi 19 pi 20 pi 21
## 0.1788767 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000 0.0000000
## pi 22 pi 23 pi 24
## 0.0000000 0.0000000 -0.4717435
##
## Descriptive Statistics for the Residuals
##
## ----------------------------------------
## Warning: package 'fBasics' was built under R version 4.3.3
##
## Normality Tests
##
## --------------------
##
## Shapiro-Wilk normality test
##
## data: resi
## W = 0.9614, p-value = 2.365e-07
##
##
## Anderson-Darling normality test
##
## data: resi
## A = 3.239, p-value = 3.951e-08
## Warning: package 'tseries' was built under R version 4.3.3
##
## Jarque Bera Test
##
## data: resi
## X-squared = 69.227, df = 2, p-value = 8.882e-16
##
##
## Homoscedasticity Test
##
## --------------------
##
## studentized Breusch-Pagan test
##
## data: resi ~ I(obs - resi)
## BP = 63.288, df = 1, p-value = 1.786e-15
##
##
## Independence Tests
##
## --------------------
##
## Durbin-Watson test
##
## data: resi ~ I(1:length(resi))
## DW = 1.9993, p-value = 0.4748
## alternative hypothesis: true autocorrelation is greater than 0
##
##
## Ljung-Box test
## lag.df statistic p.value
## [1,] 1 2.822741e-04 0.9865954
## [2,] 2 2.457423e-02 0.9877881
## [3,] 3 3.006077e-01 0.9599141
## [4,] 4 7.550554e-01 0.9443699
## [5,] 12 1.217097e+01 0.4320488
## [6,] 24 2.699509e+01 0.3046819
## [7,] 36 3.571434e+01 0.4820668
## [8,] 48 4.870003e+01 0.4446744
#Model2
validation(mod2)
##
## --------------------------------------------------------------------
##
## Call:
## arima(x = lnserie, order = c(0, 0, 6), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 sma1
## 0.2306 0.3329 0.3308 0.1566 0.2014 0.2190 -0.6372
## s.e. 0.0625 0.0638 0.0648 0.0597 0.0631 0.0678 0.0548
##
## sigma^2 estimated as 0.01756: log likelihood = 177.2, aic = -338.41
##
## Modul of AR Characteristic polynomial Roots:
##
## Modul of MA Characteristic polynomial Roots: 1.038267 1.038267 1.038267 1.038267 1.038267 1.038267 1.038267 1.038267 1.038267 1.038267 1.038267 1.038267 1.24714 1.394994 1.24714 1.228322 1.394994 1.228322
##
## Psi-weights (MA(inf))
##
## --------------------
## psi 1 psi 2 psi 3 psi 4 psi 5 psi 6
## 0.23061319 0.33293338 0.33075479 0.15655891 0.20137021 0.21897816
## psi 7 psi 8 psi 9 psi 10 psi 11 psi 12
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 -0.63722575
## psi 13 psi 14 psi 15 psi 16 psi 17 psi 18
## -0.14695266 -0.21215372 -0.21076547 -0.09976337 -0.12831829 -0.13953852
## psi 19 psi 20 psi 21 psi 22 psi 23 psi 24
## 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##
## Pi-weights (AR(inf))
##
## --------------------
## pi 1 pi 2 pi 3 pi 4 pi 5 pi 6
## 0.230613187 0.279750941 0.189461703 -0.056548300 0.022699360 0.079668709
## pi 7 pi 8 pi 9 pi 10 pi 11 pi 12
## -0.143721084 -0.091446467 0.008932882 0.071260999 0.012325823 -0.640935806
## pi 13 pi 14 pi 15 pi 16 pi 17 pi 18
## 0.168622505 0.177494946 0.096684184 -0.054905787 0.021732242 0.059896448
## pi 19 pi 20 pi 21 pi 22 pi 23 pi 24
## -0.090691559 -0.055955742 0.009769632 0.044642815 0.002338096 -0.410783349
##
## Descriptive Statistics for the Residuals
##
## ----------------------------------------
##
## Normality Tests
##
## --------------------
##
## Shapiro-Wilk normality test
##
## data: resi
## W = 0.96208, p-value = 2.942e-07
##
##
## Anderson-Darling normality test
##
## data: resi
## A = 3.0447, p-value = 1.174e-07
##
##
## Jarque Bera Test
##
## data: resi
## X-squared = 61.108, df = 2, p-value = 5.373e-14
##
##
## Homoscedasticity Test
##
## --------------------
##
## studentized Breusch-Pagan test
##
## data: resi ~ I(obs - resi)
## BP = 59.127, df = 1, p-value = 1.478e-14
##
##
## Independence Tests
##
## --------------------
##
## Durbin-Watson test
##
## data: resi ~ I(1:length(resi))
## DW = 1.9466, p-value = 0.2979
## alternative hypothesis: true autocorrelation is greater than 0
##
##
## Ljung-Box test
## lag.df statistic p.value
## [1,] 1 0.1951259 0.65868417
## [2,] 2 0.7163975 0.69893416
## [3,] 3 1.5321608 0.67486778
## [4,] 4 2.7683592 0.59730673
## [5,] 12 21.4989198 0.04353483
## [6,] 24 37.2400059 0.04140534
## [7,] 36 45.5395482 0.13242378
## [8,] 48 58.8672871 0.13524952
#Model3
validation(mod3)
##
## --------------------------------------------------------------------
##
## Call:
## arima(x = lnserie, order = c(1, 0, 1), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ar1 ma1 sma1
## 0.9286 -0.6160 -0.7012
## s.e. 0.0289 0.0542 0.0491
##
## sigma^2 estimated as 0.01747: log likelihood = 177.31, aic = -346.63
##
## Modul of AR Characteristic polynomial Roots: 1.076841
##
## Modul of MA Characteristic polynomial Roots: 1.030027 1.030027 1.030027 1.030027 1.030027 1.030027 1.030027 1.030027 1.030027 1.030027 1.030027 1.030027 1.623367
##
## Psi-weights (MA(inf))
##
## --------------------
## psi 1 psi 2 psi 3 psi 4 psi 5 psi 6
## 0.31263886 0.29032975 0.26961256 0.25037370 0.23250767 0.21591651
## psi 7 psi 8 psi 9 psi 10 psi 11 psi 12
## 0.20050926 0.18620143 0.17291457 0.16057583 0.14911755 -0.56268581
## psi 13 psi 14 psi 15 psi 16 psi 17 psi 18
## -0.09061517 -0.08414910 -0.07814444 -0.07256825 -0.06738996 -0.06258119
## psi 19 psi 20 psi 21 psi 22 psi 23 psi 24
## -0.05811555 -0.05396858 -0.05011752 -0.04654126 -0.04322019 -0.04013611
##
## Pi-weights (AR(inf))
##
## --------------------
## pi 1 pi 2 pi 3 pi 4 pi 5 pi 6
## 0.312638862 0.192586693 0.118634113 0.073079051 0.045016965 0.027730617
## pi 7 pi 8 pi 9 pi 10 pi 11 pi 12
## 0.017082162 0.010522675 0.006482007 0.003992940 0.002459666 -0.699647548
## pi 13 pi 14 pi 15 pi 16 pi 17 pi 18
## 0.220144058 0.135609552 0.083535985 0.051458475 0.031698610 0.019526461
## pi 19 pi 20 pi 21 pi 22 pi 23 pi 24
## 0.012028372 0.007409521 0.004564293 0.002811621 0.001731969 -0.490562249
##
## Descriptive Statistics for the Residuals
##
## ----------------------------------------
##
## Normality Tests
##
## --------------------
##
## Shapiro-Wilk normality test
##
## data: resi
## W = 0.96045, p-value = 1.749e-07
##
##
## Anderson-Darling normality test
##
## data: resi
## A = 3.3745, p-value = 1.85e-08
##
##
## Jarque Bera Test
##
## data: resi
## X-squared = 64.233, df = 2, p-value = 1.132e-14
##
##
## Homoscedasticity Test
##
## --------------------
##
## studentized Breusch-Pagan test
##
## data: resi ~ I(obs - resi)
## BP = 63.125, df = 1, p-value = 1.94e-15
##
##
## Independence Tests
##
## --------------------
##
## Durbin-Watson test
##
## data: resi ~ I(1:length(resi))
## DW = 2.1708, p-value = 0.9274
## alternative hypothesis: true autocorrelation is greater than 0
##
##
## Ljung-Box test
## lag.df statistic p.value
## [1,] 1 2.342953 0.125850766
## [2,] 2 3.601771 0.165152581
## [3,] 3 8.910397 0.030506241
## [4,] 4 10.888066 0.027851224
## [5,] 12 27.619745 0.006285618
## [6,] 24 48.146257 0.002420947
## [7,] 36 59.152196 0.008850240
## [8,] 48 79.423263 0.002910022
# model 1
ultim=c(2018,12)
pdq=c(3,0,0)
PDQ=c(0,1,1)
serie2=window(serie,end=ultim)
lnserie2=log(serie2)
serie1=window(serie,end=ultim+c(1,0))
lnserie1=log(serie1)
(modA=arima(lnserie1,order=pdq,seasonal=list(order=PDQ,period=12)))
##
## Call:
## arima(x = lnserie1, order = pdq, seasonal = list(order = PDQ, period = 12))
##
## Coefficients:
## ar1 ar2 ar3 sma1
## 0.2302 0.2949 0.2604 -0.6868
## s.e. 0.0560 0.0551 0.0562 0.0487
##
## sigma^2 estimated as 0.01689: log likelihood = 182.45, aic = -354.89
(modB=arima(lnserie2,order=pdq,seasonal=list(order=PDQ,period=12)))
##
## Call:
## arima(x = lnserie2, order = pdq, seasonal = list(order = PDQ, period = 12))
##
## Coefficients:
## ar1 ar2 ar3 sma1
## 0.2400 0.2903 0.2671 -0.7061
## s.e. 0.0571 0.0562 0.0571 0.0463
##
## sigma^2 estimated as 0.0166: log likelihood = 177.15, aic = -344.31
pred=predict(modB,n.ahead=12)
pr<-ts(c(tail(lnserie2,1),pred$pred),start=ultim,freq=12)
se<-ts(c(0,pred$se),start=ultim,freq=12)
#Intervals
tl<-ts(exp(pr-1.96*se),start=ultim,freq=12)
tu<-ts(exp(pr+1.96*se),start=ultim,freq=12)
pr<-ts(exp(pr),start=ultim,freq=12)
ts.plot(serie,tl,tu,pr,lty=c(1,2,2,1),col=c(1,4,4,2),xlim=ultim[1]+c(-2,+2),type="o",main=paste("Model ARIMA(",paste(pdq,collapse=","),")(",paste(PDQ,collapse=","),")12",sep=""))
abline(v=(ultim[1]-2):(ultim[1]+2),lty=3,col=4)
obs=window(serie,start=ultim+c(0,1))
pr=window(pr,start=ultim+c(0,1))
ts(data.frame(LowLim=tl[-1],Predic=pr,UpperLim=tu[-1],Observ=obs,Error=obs-pr,PercentError=(obs-pr)/obs),start=ultim+c(0,1),freq=12)
## LowLim Predic UpperLim V1 obs X.obs...pr.
## Jan 2019 132.81638 170.96741 220.07719 184.174 13.206588 0.071707126
## Feb 2019 157.09131 203.67074 264.06152 200.660 -3.010741 -0.015004189
## Mar 2019 156.48287 205.83601 270.75464 210.198 4.361986 0.020751798
## Apr 2019 138.13052 185.33374 248.66768 188.326 2.992257 0.015888708
## May 2019 160.08345 216.42180 292.58738 229.168 12.746199 0.055619453
## Jun 2019 153.15289 208.73764 284.49612 209.779 1.041359 0.004964077
## Jul 2019 134.60204 184.68647 253.40695 186.358 1.671530 0.008969458
## Aug 2019 43.95660 60.58826 83.51277 97.569 36.980739 0.379021397
## Sep 2019 131.87894 182.49773 252.54541 184.371 1.873269 0.010160324
## Oct 2019 137.30169 190.59409 264.57145 207.221 16.626913 0.080237585
## Nov 2019 144.95486 201.70469 280.67207 198.337 -3.367687 -0.016979619
## Dec 2019 87.20219 121.58145 169.51466 141.364 19.782547 0.139940485
mod.RMSE1=sqrt(sum((obs-pr)^2)/12)
mod.MAE1=sum(abs(obs-pr))/12
mod.RMSPE1=sqrt(sum(((obs-pr)/obs)^2)/12)
mod.MAPE1=sum(abs(obs-pr)/obs)/12
data.frame("RMSE"=mod.RMSE1,"MAE"=mod.MAE1,"RMSPE"=mod.RMSPE1,"MAPE"=mod.MAPE1)
## RMSE MAE RMSPE MAPE
## 1 14.22449 9.805151 0.1222426 0.06827035
mCI1=mean(tu-tl)
cat("\nMean Length CI: ",mCI1)
##
## Mean Length CI: 100.5549
pred=predict(modA,n.ahead=12)
pr<-ts(c(tail(lnserie1,1),pred$pred),start=ultim+c(1,0),freq=12)
se<-ts(c(0,pred$se),start=ultim+c(1,0),freq=12)
tl1<-ts(exp(pr-1.96*se),start=ultim+c(1,0),freq=12)
tu1<-ts(exp(pr+1.96*se),start=ultim+c(1,0),freq=12)
pr1<-ts(exp(pr),start=ultim+c(1,0),freq=12)
ts.plot(serie,tl1,tu1,pr1,lty=c(1,2,2,1),col=c(1,4,4,2),xlim=c(ultim[1]-2,ultim[1]+3),type="o",main=paste("Model ARIMA(",paste(pdq,collapse=","),")(",paste(PDQ,collapse=","),")12",sep=""))
abline(v=(ultim[1]-2):(ultim[1]+3),lty=3,col=4)
(previs2=window(cbind(tl1,pr1,tu1),start=ultim+c(1,0)))
## tl1 pr1 tu1
## Dec 2019 141.36400 141.36400 141.3640
## Jan 2020 142.59293 183.95313 237.3102
## Feb 2020 165.01772 214.30542 278.3144
## Mar 2020 170.47125 224.64588 296.0369
## Apr 2020 145.73734 195.72627 262.8618
## May 2020 171.43637 231.89284 313.6691
## Jun 2020 160.93564 219.37912 299.0462
## Jul 2020 140.12612 192.19701 263.6175
## Aug 2020 53.12838 73.17676 100.7906
## Sep 2020 136.39483 188.54170 260.6255
## Oct 2020 145.17373 201.23515 278.9457
## Nov 2020 148.50243 206.28974 286.5640
## Dec 2020 93.72635 130.42278 181.4869
ultim=c(2018,12)
pdq=c(0,0,6)
PDQ=c(0,1,1)
serie2=window(serie,end=ultim)
lnserie2=log(serie2)
serie1=window(serie,end=ultim+c(1,0))
lnserie1=log(serie1)
(modA=arima(lnserie1,order=pdq,seasonal=list(order=PDQ,period=12)))
##
## Call:
## arima(x = lnserie1, order = pdq, seasonal = list(order = PDQ, period = 12))
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 sma1
## 0.2306 0.3329 0.3308 0.1566 0.2014 0.2190 -0.6372
## s.e. 0.0625 0.0638 0.0648 0.0597 0.0631 0.0678 0.0548
##
## sigma^2 estimated as 0.01756: log likelihood = 177.2, aic = -338.41
(modB=arima(lnserie2,order=pdq,seasonal=list(order=PDQ,period=12)))
##
## Call:
## arima(x = lnserie2, order = pdq, seasonal = list(order = PDQ, period = 12))
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 sma1
## 0.2441 0.3516 0.3648 0.1617 0.1854 0.2432 -0.6632
## s.e. 0.0638 0.0637 0.0647 0.0614 0.0606 0.0698 0.0532
##
## sigma^2 estimated as 0.01723: log likelihood = 172.24, aic = -328.48
pred=predict(modB,n.ahead=12)
pr<-ts(c(tail(lnserie2,1),pred$pred),start=ultim,freq=12)
se<-ts(c(0,pred$se),start=ultim,freq=12)
#Intervals
tl<-ts(exp(pr-1.96*se),start=ultim,freq=12)
tu<-ts(exp(pr+1.96*se),start=ultim,freq=12)
pr<-ts(exp(pr),start=ultim,freq=12)
ts.plot(serie,tl,tu,pr,lty=c(1,2,2,1),col=c(1,4,4,2),xlim=ultim[1]+c(-2,+2),type="o",main=paste("Model ARIMA(",paste(pdq,collapse=","),")(",paste(PDQ,collapse=","),")12",sep=""))
abline(v=(ultim[1]-2):(ultim[1]+2),lty=3,col=4)
obs=window(serie,start=ultim+c(0,1))
pr=window(pr,start=ultim+c(0,1))
ts(data.frame(LowLim=tl[-1],Predic=pr,UpperLim=tu[-1],Observ=obs,Error=obs-pr,PercentError=(obs-pr)/obs),start=ultim+c(0,1),freq=12)
## LowLim Predic UpperLim V1 obs X.obs...pr.
## Jan 2019 129.64238 167.68033 216.87886 184.174 16.4936745 0.089554848
## Feb 2019 161.39408 210.33153 274.10764 200.660 -9.6715264 -0.048198577
## Mar 2019 148.61567 196.60990 260.10348 210.198 13.5880994 0.064644285
## Apr 2019 139.83503 187.84892 252.34889 188.326 0.4770829 0.002533282
## May 2019 166.52515 224.35700 302.27305 229.168 4.8109951 0.020993311
## Jun 2019 158.26227 214.03436 289.46071 209.779 -4.2553610 -0.020284971
## Jul 2019 137.54664 187.21568 254.82055 186.358 -0.8576759 -0.004602303
## Aug 2019 45.83506 62.38642 84.91459 97.569 35.1825788 0.360591774
## Sep 2019 134.99659 183.74479 250.09630 184.371 0.6262080 0.003396456
## Oct 2019 141.95359 193.21400 262.98491 207.221 14.0069968 0.067594485
## Nov 2019 151.15746 205.74146 280.03612 198.337 -7.4044584 -0.037332714
## Dec 2019 90.41749 123.06787 167.50852 141.364 18.2961307 0.129425672
mod.RMSE2=sqrt(sum((obs-pr)^2)/12)
mod.MAE2=sum(abs(obs-pr))/12
mod.RMSPE2=sqrt(sum(((obs-pr)/obs)^2)/12)
mod.MAPE2=sum(abs(obs-pr)/obs)/12
data.frame("RMSE"=mod.RMSE2,"MAE"=mod.MAE2,"RMSPE"=mod.RMSPE2,"MAPE"=mod.MAPE2)
## RMSE MAE RMSPE MAPE
## 1 14.1904 10.47257 0.1183757 0.07076272
mCI2=mean(tu-tl)
cat("\nMean Length CI: ",mCI2)
##
## Mean Length CI: 99.18094
prediction for model 2(not the final prediction)
# pred=predict(modA,n.ahead=12)
# pr<-ts(c(tail(lnserie1,1),pred$pred),start=ultim+c(1,0),freq=12)
# se<-ts(c(0,pred$se),start=ultim+c(1,0),freq=12)
# tl1<-ts(exp(pr-1.96*se),start=ultim+c(1,0),freq=12)
# tu1<-ts(exp(pr+1.96*se),start=ultim+c(1,0),freq=12)
# pr1<-ts(exp(pr),start=ultim+c(1,0),freq=12)
# ts.plot(serie,tl1,tu1,pr1,lty=c(1,2,2,1),col=c(1,4,4,2),xlim=c(ultim[1]-2,ultim[1]+3),type="o",main=paste("Model ARIMA(",paste(pdq,collapse=","),")(",paste(PDQ,collapse=","),")12",sep=""))
# abline(v=(ultim[1]-2):(ultim[1]+3),lty=3,col=4)
# (previs3=window(cbind(tl1,pr1,tu1),start=ultim+c(1,0)))
ultim=c(2018,12)
pdq=c(1,0,1)
PDQ=c(0,1,1)
serie2=window(serie,end=ultim)
lnserie2=log(serie2)
serie1=window(serie,end=ultim+c(1,0))
lnserie1=log(serie1)
(modA=arima(lnserie1,order=pdq,seasonal=list(order=PDQ,period=12)))
##
## Call:
## arima(x = lnserie1, order = pdq, seasonal = list(order = PDQ, period = 12))
##
## Coefficients:
## ar1 ma1 sma1
## 0.9286 -0.6160 -0.7012
## s.e. 0.0289 0.0542 0.0491
##
## sigma^2 estimated as 0.01747: log likelihood = 177.31, aic = -346.63
(modB=arima(lnserie2,order=pdq,seasonal=list(order=PDQ,period=12)))
##
## Call:
## arima(x = lnserie2, order = pdq, seasonal = list(order = PDQ, period = 12))
##
## Coefficients:
## ar1 ma1 sma1
## 0.9296 -0.6071 -0.7206
## s.e. 0.0287 0.0549 0.0465
##
## sigma^2 estimated as 0.01722: log likelihood = 171.8, aic = -335.61
pred=predict(modB,n.ahead=12)
pr<-ts(c(tail(lnserie2,1),pred$pred),start=ultim,freq=12)
se<-ts(c(0,pred$se),start=ultim,freq=12)
#Intervals
tl<-ts(exp(pr-1.96*se),start=ultim,freq=12)
tu<-ts(exp(pr+1.96*se),start=ultim,freq=12)
pr<-ts(exp(pr),start=ultim,freq=12)
ts.plot(serie,tl,tu,pr,lty=c(1,2,2,1),col=c(1,4,4,2),xlim=ultim[1]+c(-2,+2),type="o",main=paste("Model ARIMA(",paste(pdq,collapse=","),")(",paste(PDQ,collapse=","),")12",sep=""))
abline(v=(ultim[1]-2):(ultim[1]+2),lty=3,col=4)
obs=window(serie,start=ultim+c(0,1))
pr=window(pr,start=ultim+c(0,1))
ts(data.frame(LowLim=tl[-1],Predic=pr,UpperLim=tu[-1],Observ=obs,Error=obs-pr,PercentError=(obs-pr)/obs),start=ultim+c(0,1),freq=12)
## LowLim Predic UpperLim V1 obs X.obs...pr.
## Jan 2019 131.68690 170.30963 220.26011 184.174 13.86436819 7.527864e-02
## Feb 2019 155.83773 204.18969 267.54388 200.660 -3.52969060 -1.759040e-02
## Mar 2019 157.86459 209.08802 276.93229 210.198 1.10997659 5.280624e-03
## Apr 2019 138.92328 185.66332 248.12882 188.326 2.66268171 1.413868e-02
## May 2019 161.36627 217.29241 292.60136 229.168 11.87559039 5.182046e-02
## Jun 2019 154.78605 209.76490 284.27183 209.779 0.01409777 6.720295e-05
## Jul 2019 136.21776 185.60159 252.88883 186.358 0.75640899 4.058903e-03
## Aug 2019 44.25372 60.57497 82.91568 97.569 36.99402647 3.791576e-01
## Sep 2019 133.62752 183.62864 252.33932 184.371 0.74235934 4.026443e-03
## Oct 2019 138.57856 191.07056 263.44593 207.221 16.15044249 7.793825e-02
## Nov 2019 145.81965 201.63106 278.80388 198.337 -3.29405997 -1.660840e-02
## Dec 2019 87.83719 121.75469 168.76910 141.364 19.60931286 1.387150e-01
mod.RMSE3=sqrt(sum((obs-pr)^2)/12)
mod.MAE3=sum(abs(obs-pr))/12
mod.RMSPE3=sqrt(sum(((obs-pr)/obs)^2)/12)
mod.MAPE3=sum(abs(obs-pr)/obs)/12
data.frame("RMSE"=mod.RMSE1,"MAE"=mod.MAE3,"RMSPE"=mod.RMSPE3,"MAPE"=mod.MAPE3)
## RMSE MAE RMSPE MAPE
## 1 14.22449 9.216918 0.1218861 0.06539005
mCI3=mean(tu-tl)
cat("\nMean Length CI: ",mCI3)
##
## Mean Length CI: 100.1617
resul=data.frame(
par=c(length(coef(mod1)),length(coef(mod2)), length(coef(mod3))),
Sigma2Z=c(mod1$sigma2,mod2$sigma2, mod3$sigma2),
AIC=c(AIC(mod1),AIC(mod2), AIC(mod3)),
BIC=c(BIC(mod1),BIC(mod2), BIC(mod3)),
RMSE=c(mod.RMSE2,mod.RMSE3, mod.RMSE1),
MAE=c(mod.MAE1,mod.MAE2, mod.MAE3),
RMSPE=c(mod.RMSPE1,mod.RMSPE2, mod.RMSPE3),
MAPE=c(mod.MAPE1,mod.MAPE2, mod.MAPE3),
meanLength=c(mCI1,mCI2, mCI3)
)
row.names(resul)=c("ARIMA(3,0,0)(0,0,1)12","ARIMA(0,0,6)(0,1,1)12", "ARMIA(1,0,1)(0,1,1)12")
resul
## par Sigma2Z AIC BIC RMSE MAE
## ARIMA(3,0,0)(0,0,1)12 4 0.01688502 -354.8938 -336.3749 14.19040 9.805151
## ARIMA(0,0,6)(0,1,1)12 7 0.01755732 -338.4078 -308.7776 14.08287 10.472566
## ARMIA(1,0,1)(0,1,1)12 3 0.01746909 -346.6285 -331.8134 14.22449 9.216918
## RMSPE MAPE meanLength
## ARIMA(3,0,0)(0,0,1)12 0.1222426 0.06827035 100.55493
## ARIMA(0,0,6)(0,1,1)12 0.1183757 0.07076272 99.18094
## ARMIA(1,0,1)(0,1,1)12 0.1218861 0.06539005 100.16168
Based on the analysis above, we chose Model 1 and its final prediction is in the corresponding part above.